/*
Create the tables found in the CJIVE paper

Centering Values
1000 iterations
9142 observations (10% Sample)
146 Judges
438 Clusters (3 / Judge)
sigvc = 0.5
rho = 0.5
alpha = 1
gamma = 1

Created by Samuel McIntyre
January 2024
spm42@byu.edu

*/


********************************************
* If you don't want to run the code from the master do-file (JFEguide_masterscript), update the root directory filepath global here. If you run the code from the master do-file, it will control the root directory filepath and ignore the filepath below.
* If you don't want to run the code from the master do-file (JFEguide_masterscript), update the root directory filepath global here

if "$master" == ""	{
****************************
*** UPDATE FILEPATH HERE ***
****************************
global root_dir /Users/andersee/Dropbox/cluster_jackknife/restat/cjive_replication_package
*****************************

*** subfolders
global raw_data  $root_dir/raw_data
global intermediate_data $root_dir/intermediate_data
global clean_data $root_dir/clean_data
global tabfig $root_dir/tabfig
global logfiles $root_dir/logfiles
}
********************************************
set seed 242

cap net install grc1leg, from( http://www.stata.com/users/vwiggins/) 


cap log close
log using $logfiles/paper_simulations.log, replace


local start_time_simulations = clock(c(current_date) + " " + c(current_time), "DMYhms")
display "Start time simulations: " %tcCCYY-NN-DD_HH:MM:SS `start_time_simulations'

/**********************************************************
						Setup
**********************************************************/

*Get the programs in memory
do $root_dir/paper_simulations_code.do
do cjive.ado

*Set baseline local variables
local B = 1000
local n = 9142
local k = 146 
local c = 438 
local sigvc = 0.5
local rho = 0.5
local alpha = 1
local gamma = 1

/**********************************************************
						Experiments
**********************************************************/

*Examine Cluster Variations
forvalues i =  3(2)15 {
	local exp = `k' * `i'
	cjive_simulations `B' `n' `k' `exp' `sigvc' `rho' `alpha' `gamma'
	save "$intermediate_data/cluster_`i'.dta", replace
}

*Examine Judge Variations
forvalues i =  1/7 {
	local exp = ceil(`k' * `i' / 5)
	cjive_simulations `B' `n' `exp' `c' `sigvc' `rho' `alpha' `gamma'
	save "$intermediate_data/judge_`i'.dta", replace
}

*Examine Sigvc Variations
forvalues i =  1/9 {
	local exp = `i' / 10
	cjive_simulations `B' `n' `k' `c' `exp' `rho' `alpha' `gamma'
	save "$intermediate_data/sigvc_`i'.dta", replace
}

*Examine Rho Variations
forvalues i =  1/10 {
	local exp = `i' / 20
	cjive_simulations `B' `n' `k' `c' `sigvc' `exp' `alpha' `gamma'
	save "$intermediate_data/rho_`i'.dta", replace
}


/**********************************************************
					Prepare Data
**********************************************************/

// FIXME - do not need to repeat. Also, consider global variables?
*Set baseline local variables
local B = 1000
local n = 9142
local k = 146 
local c = 438 
local sigvc = 0.5
local rho = 0.5
local alpha = 1
local gamma = 1


*Collapse Cluster
forvalues i =  3(2)15 {
	use "$intermediate_data/cluster_`i'.dta", clear
	foreach est in "ols" "2sls" "liml" "jive" "jkc"{
		qui sum bias`est'
		scalar se`est'2 = r(sd)
	}
	collapse (mean) *
	foreach est in "ols" "2sls" "liml" "jive" "jkc"{
		gen se`est'ratio = se`est' / se`est'2
		gen sd`est' = se`est'2
	}
	save "$intermediate_data/cluster_`i'_coll.dta", replace
}

*Append Clusters
use "$intermediate_data/cluster_3_coll.dta", clear
gen n = `k'*3
local iter = 1
forvalues i = 5(2)15{
	local iter = `iter' + 1
	append using "$intermediate_data/cluster_`i'_coll.dta"
	replace n = `i'*`k' if `iter' == _n
}

order biasjkc biasjive biasliml bias2sls biasols sejkc sejive seliml se2sls seols sejkcratio sejiveratio selimlratio se2slsratio seolsratio coveredjkc coveredjive coveredliml covered2sls coveredols n
save "$clean_data/cluster_full.dta", replace


*Collapse Judge
forvalues i =  1/7 {
	use "$intermediate_data/judge_`i'.dta", clear
	foreach est in "ols" "2sls" "liml" "jive" "jkc"{
		qui sum bias`est'
		scalar se`est'2 = r(sd)
	}
	collapse (mean) *
	foreach est in "ols" "2sls" "liml" "jive" "jkc"{
		gen se`est'ratio = se`est' / se`est'2
		gen sd`est' = se`est'2
	}
	save "$intermediate_data/judge_`i'_coll.dta", replace
}
*Append Judges
use "$intermediate_data/judge_1_coll.dta", clear
gen n = ceil(`k' /  5)
local iter = 1
forvalues i = 2/7{
	local iter = `iter' + 1
	append using "$intermediate_data/judge_`i'_coll.dta"
	replace n = ceil(`k' * `i' / 5) if `iter' == _n
}

order biasjkc biasjive biasliml bias2sls biasols sejkc sejive seliml se2sls seols sejkcratio sejiveratio selimlratio se2slsratio seolsratio coveredjkc coveredjive coveredliml covered2sls coveredols n
save "$clean_data/judge_full.dta", replace


*Collapse Sigvc
forvalues i =  1/9 {
	use "$intermediate_data/sigvc_`i'.dta", clear
	foreach est in "ols" "2sls" "liml" "jive" "jkc"{
		qui sum bias`est'
		scalar se`est'2 = r(sd)
	}
	collapse (mean) *
	foreach est in "ols" "2sls" "liml" "jive" "jkc"{
		gen se`est'ratio = se`est' / se`est'2
		gen sd`est' = se`est'2
	}
	save "$intermediate_data/sigvc_`i'_coll.dta", replace
}
*Append Sigvc
use "$intermediate_data/sigvc_1_coll.dta", clear
gen n = 1 / 10
local iter = 1
forvalues i = 2/9{
	local iter = `iter' + 1
	append using "$intermediate_data/sigvc_`i'_coll.dta"
	replace n = `i'/10 if `iter' == _n
}
order biasjkc biasjive biasliml bias2sls biasols sejkc sejive seliml se2sls seols sejkcratio sejiveratio selimlratio se2slsratio seolsratio coveredjkc coveredjive coveredliml covered2sls coveredols n
save "$clean_data/sigvc_full.dta", replace

*Collapse Rho
forvalues i =  1/10 {
	use "$intermediate_data/rho_`i'.dta", clear
	foreach est in "ols" "2sls" "liml" "jive" "jkc"{
		qui sum bias`est'
		scalar se`est'2 = r(sd)
	}
	collapse (mean) *
	foreach est in "ols" "2sls" "liml" "jive" "jkc"{
		gen se`est'ratio = se`est' / se`est'2
		gen sd`est' = se`est'2
	}
	save "$intermediate_data/rho_`i'_coll.dta", replace
}
*Append Rho
use "$intermediate_data/rho_1_coll.dta", clear
gen n = 1 / 20
local iter = 1
forvalues i = 2/10{
	local iter = `iter' + 1
	append using "$intermediate_data/rho_`i'_coll.dta"
	replace n = `i'/20 if `iter' == _n
}
order biasjkc biasjive biasliml bias2sls biasols sejkc sejive seliml se2sls seols sejkcratio sejiveratio selimlratio se2slsratio seolsratio coveredjkc coveredjive coveredliml covered2sls coveredols n
save "$clean_data/rho_full.dta", replace


/**********************************************************
				Make Figures in Paper
**********************************************************/
*Clusters
use "$clean_data/cluster_full.dta", clear
keep biasjkc biasjive bias2sls sdjkc sdjive sd2sls sejkcratio sejiveratio se2slsratio coveredjkc coveredjive covered2sls n
sort n

twoway connected bias* n, yline(0) xtitle("Number of Clusters") legend(label(1 "Cluster Jacknife") label(2 "JIVE") label(3 "2SLS") region(lcolor(black) lwidth(medium))) lpattern(solid longdash dash_dot) msymbol(i i i) lwidth(thick thick thick) lcolor(black gray gray) graphregion(color(gs15))
graph save "$tabfig/bias_cluster", replace

twoway connected sdjkc sdjive sd2sls n, xtitle("Number of Clusters") legend(label(1 "Cluster Jacknife") label(2 "JIVE") label(3 "2SLS") region(lcolor(black) lwidth(medium))) lpattern(solid longdash dash_dot) msymbol(i i i) lwidth(thick thick thick) lcolor(black gray gray) graphregion(color(gs15))
graph save "$tabfig/sd_cluster", replace

twoway connected covered* n, yline(0.95) xtitle("Number of Clusters") legend(label(1 "Cluster Jacknife") label(2 "JIVE") label(3 "2SLS") region(lcolor(black) lwidth(medium))) lpattern(solid longdash dash_dot) msymbol(i i i) lwidth(thick thick thick) lcolor(black gray gray) graphregion(color(gs15))
graph save "$tabfig/covered_cluster", replace



*Judges
use "$clean_data/judge_full.dta", clear
keep biasjkc biasjive bias2sls sdjkc sdjive sd2sls sejkcratio sejiveratio se2slsratio coveredjkc coveredjive covered2sls n

twoway connected bias* n, yline(0) xtitle("Number of Instruments") legend(off) lpattern(solid longdash dash_dot) msymbol(i i i) lwidth(thick thick thick) lcolor(black gray gray) graphregion(color(gs15))
graph save "$tabfig/bias_judge", replace

twoway connected sdjkc sdjive sd2sls n, xtitle("Number of Instruments") legend(off) lpattern(solid longdash dash_dot) msymbol(i i i) lwidth(thick thick thick) lcolor(black gray gray) graphregion(color(gs15))
graph save "$tabfig/sd_judge", replace

twoway connected covered* n, yline(0.95) xtitle("Number of Instruments") legend(off) lpattern(solid longdash dash_dot) msymbol(i i i) lwidth(thick thick thick) lcolor(black gray gray) graphregion(color(gs15))
graph save "$tabfig/covered_judge", replace

*Sigvc
use "$clean_data/sigvc_full.dta", clear
keep biasjkc biasjive bias2sls sdjkc sdjive sd2sls sejkcratio sejiveratio se2slsratio coveredjkc coveredjive covered2sls n

twoway connected bias* n, yline(0) xtitle("Cluster Importance") legend(off) lpattern(solid longdash dash_dot) msymbol(i i i) lwidth(thick thick thick) lcolor(black gray gray) graphregion(color(gs15))
graph save "$tabfig/bias_sigvc", replace

twoway connected sdjkc sdjive sd2sls n, xtitle("Cluster Importance") legend(off) lpattern(solid longdash dash_dot) msymbol(i i i) lwidth(thick thick thick) lcolor(black gray gray) graphregion(color(gs15))
graph save "$tabfig/sd_sigvc", replace

twoway connected covered* n, yline(0.95) xtitle("Cluster Importance") legend(off) lpattern(solid longdash dash_dot) msymbol(i i i) lwidth(thick thick thick) lcolor(black gray gray) graphregion(color(gs15))
graph save "$tabfig/covered_sigvc", replace


*Rho
use "$clean_data/rho_full.dta", clear
keep biasjkc biasjive bias2sls sdjkc sdjive sd2sls sejkcratio sejiveratio se2slsratio coveredjkc coveredjive covered2sls n

twoway connected bias* n, yline(0) xtitle("Degree of Endogeneity") legend(off) lpattern(solid longdash dash_dot) msymbol(i i i) lwidth(thick thick thick) lcolor(black gray gray) graphregion(color(gs15))
graph save "$tabfig/bias_rho", replace

twoway connected sdjkc sdjive sd2sls n, xtitle("Degree of Endogeneity") legend(off) lpattern(solid longdash dash_dot) msymbol(i i i) lwidth(thick thick thick) lcolor(black gray gray) graphregion(color(gs15))
graph save "$tabfig/sd_rho", replace

twoway connected covered* n, yline(0.95) xtitle("Degree of Endogeneity") legend(off) lpattern(solid longdash dash_dot) msymbol(i i i) lwidth(thick thick thick) lcolor(black gray gray) graphregion(color(gs15))
graph save "$tabfig/covered_rho", replace


*Combine Graphs
grc1leg "$tabfig/bias_cluster.gph" "$tabfig/bias_judge.gph" "$tabfig/bias_sigvc.gph" "$tabfig/bias_rho.gph", ycommon legendfrom("$tabfig/bias_cluster.gph") title("Bias") graphregion(color(gs15))
graph save "$tabfig/bias_all.gph", replace
graph export "$tabfig/bias_all.png", replace
graph export "$tabfig/bias_all.pdf", replace


grc1leg "$tabfig/covered_cluster.gph" "$tabfig/covered_judge.gph" "$tabfig/covered_sigvc.gph" "$tabfig/covered_rho.gph", ycommon legendfrom("$tabfig/covered_cluster.gph") title("Confidence Interval Coverage") graphregion(color(gs15))
graph save "$tabfig/covered_all.gph", replace
graph export "$tabfig/covered_all.png", replace
graph export "$tabfig/covered_all.pdf", replace


grc1leg "$tabfig/sd_cluster.gph" "$tabfig/sd_judge.gph" "$tabfig/sd_sigvc.gph" "$tabfig/sd_rho.gph", ycommon legendfrom("$tabfig/sd_cluster.gph") title("Simulation Standard Deviation") graphregion(color(gs15))
graph save "$tabfig/sd_all.gph", replace
graph export "$tabfig/sd_all.png", replace
graph export "$tabfig/sd_all.pdf", replace


* Calculate runtime of do-file
local end_time_simulations = clock(c(current_date) + " " + c(current_time), "DMYhms")
display "End time simulations: " %tcCCYY-NN-DD_HH:MM:SS `end_time_simulations'

	* Calculate time difference in milliseconds
	local duration_ms = `end_time_simulations' - `start_time_simulations'

	* Convert milliseconds to hours (1 hour = 3,600,000 ms)
	local duration_hours = `duration_ms' / 3600000

	* Display the result
	display "Duration simulations: " `duration_hours' " hours"


log close
